Setup¶

In [1]:
# Base libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
# NetworkX
import networkx as nx
import osmnx as ox
# OS environment setup
from local_directories import *
In [2]:
# Reset random seeds
random_seed = 2674
# Other
neighbourhood_min_nodes = 8
max_distance = 500

Load data¶

In [3]:
# Load Leciester's graph
leicester_osmnx_graph = ox.io.load_graphml(bulk_storage_directory + "/osmnx/raw_excluded/leicester-1864.graphml")
leicester_osmnx_graph_prj = ox.project_graph(leicester_osmnx_graph)
In [4]:
len(list(leicester_osmnx_graph.nodes))
Out[4]:
13293
In [5]:
ox.plot_graph(
    leicester_osmnx_graph_prj,
    node_size=5, node_color="#000000",
    edge_color="#000000", edge_linewidth=0.1,
    bgcolor="#ffffff",
    figsize=(16, 16))
Out[5]:
(<Figure size 1600x1600 with 1 Axes>, <Axes: >)
In [6]:
# Convert graph to dataframe version
leicester_osmnx_graph_prj_df = None
for node in leicester_osmnx_graph_prj:
    node_dict = leicester_osmnx_graph_prj.nodes[node]
    node_dict["osmnx_node_id"] = int(node)
    # node_dict["osmnx_node_id"] = str(node)
    if leicester_osmnx_graph_prj_df is None:
        leicester_osmnx_graph_prj_df = pd.DataFrame.from_dict([node_dict])
    else:
        leicester_osmnx_graph_prj_df = pd.concat([leicester_osmnx_graph_prj_df, pd.DataFrame.from_dict([node_dict])])
leicester_osmnx_graph_prj_df.head()
Out[6]:
y x street_count elevation elevation_aster elevation_srtm lon lat osmnx_node_id ref highway
0 5.829804e+06 622151.977595 3 72.0 35 72 -1.196195 52.604506 194739 NaN NaN
0 5.829991e+06 622098.041002 3 72.0 45 72 -1.196922 52.606196 1551014281 NaN NaN
0 5.828827e+06 622259.813792 2 79.0 57 79 -1.194965 52.595696 326312 21 motorway_junction
0 5.830107e+06 622077.742140 3 79.0 43 79 -1.197179 52.607245 326320 21 motorway_junction
0 5.829673e+06 622220.645785 3 74.0 35 74 -1.195230 52.603314 2627867454 NaN NaN
In [7]:
# Load Leciester's embeddings
leicester_emb_df = pd.read_csv(this_repo_directory + "/data/leicester-1864_emb_gnnuf_model_v0-5.csv")
leicester_emb_df.head()
Out[7]:
osmnx_node_id EMB000 EMB001
0 337976 -0.212304 -0.563564
1 337979 -0.322662 -0.882213
2 337983 -0.009132 0.948856
3 337985 -0.136350 0.965531
4 337986 -0.203456 0.447374

Explore embeddings¶

In [8]:
fig = px.scatter(
    leicester_emb_df,
    x="EMB000",
    y="EMB001",
    hover_data=['osmnx_node_id'],
    width=800, height=800
)
fig.update_layout({"plot_bgcolor": "#ffffff"})
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.show()
In [9]:
for node in leicester_osmnx_graph_prj.nodes:
    if len(leicester_emb_df[leicester_emb_df["osmnx_node_id"] == node]["EMB000"].values) == 0:
        leicester_osmnx_graph_prj.nodes[node]["EMB000"] = None
        leicester_osmnx_graph_prj.nodes[node]["EMB001"] = None
    else:
        leicester_osmnx_graph_prj.nodes[node]["EMB000"] = float(leicester_emb_df[leicester_emb_df["osmnx_node_id"] == node]["EMB000"].values)
        leicester_osmnx_graph_prj.nodes[node]["EMB001"] = float(leicester_emb_df[leicester_emb_df["osmnx_node_id"] == node]["EMB001"].values)
In [10]:
ox.plot_graph(leicester_osmnx_graph_prj, node_color=[
    leicester_osmnx_graph_prj.nodes[node]["EMB000"] for node in leicester_osmnx_graph_prj.nodes],
    node_size=10, bgcolor="#ffffff",
    figsize=(16, 16))
Out[10]:
(<Figure size 1600x1600 with 1 Axes>, <Axes: >)
In [11]:
ox.plot_graph(leicester_osmnx_graph_prj, node_color=[
    leicester_osmnx_graph_prj.nodes[node]["EMB001"] for node in leicester_osmnx_graph_prj.nodes],
    node_size=10, bgcolor="#ffffff",
    figsize=(16, 16))
Out[11]:
(<Figure size 1600x1600 with 1 Axes>, <Axes: >)

Correlations with node and ego-graph stats¶

Checking correlations between node and ego-graph pooled embeddings, and OSMnx statistics for the nodes within the city-wide network, the nodes within their ego-graph used to create the embeddings, and the basic stats for the ego-graph used to create the embeddings

In [12]:
leicester_emb_stats_for_corr = \
    leicester_emb_df[["osmnx_node_id", "EMB000", "EMB001"]].merge(
    # Ego-graph pooled embeddings
        pd.read_csv(this_repo_directory +
            "/data/leicester-1864_emb-pool_gnnuf_model_v0-5.csv"
            ).rename(columns={"EMB000":"EMB000pooled", "EMB001":"EMB001pooled"}),
        on="osmnx_node_id"
    ).merge(
    # Centrality including node-based and ego-graph-based
        pd.read_csv(this_repo_directory +
            "/data/leicester-1864_stats_node_centrality_with_egograph_dist500.csv"
            ).rename(columns={"node_id":"osmnx_node_id"}),
        on="osmnx_node_id"
    ).merge(
    # Ego-graph basic stats
        pd.read_csv(this_repo_directory +
            "/data/leicester-1864_stats_egograph_basic_dist500.csv"
            ).rename(columns={"node_id":"osmnx_node_id"}
            ).dropna(subset=["osmnx_node_id"])[
            ["osmnx_node_id","n", "m", "k_avg", "edge_length_total", "edge_length_avg",
            "streets_per_node_avg", "intersection_count", "street_length_total",
            "street_segment_count", "street_length_avg", "circuity_avg"]],
        on="osmnx_node_id"
    )
In [13]:
leicester_emb_stats_for_corr.head()
Out[13]:
osmnx_node_id EMB000 EMB001 EMB000pooled EMB001pooled closeness_networkwide betweenness_networkwide closeness_egograph betweenness_egograph n m k_avg edge_length_total edge_length_avg streets_per_node_avg intersection_count street_length_total street_segment_count street_length_avg circuity_avg
0 337976 -0.212304 -0.563564 -0.214178 0.073930 0.000000 0.000000 0.000000 0.000000 11.0 11.0 2.0 1261.861 114.714636 3.0 11.0 1261.861 11.0 114.714636 1.038343
1 337979 -0.322662 -0.882213 -0.112598 -0.131984 0.000150 0.000149 0.166667 0.106061 13.0 13.0 2.0 2126.471 163.574692 3.0 13.0 2126.471 13.0 163.574692 1.030988
2 337983 -0.009132 0.948856 -0.074082 0.194142 0.000285 0.000298 0.230769 0.115385 14.0 14.0 2.0 1870.996 133.642571 3.0 14.0 1870.996 14.0 133.642571 1.048630
3 337985 -0.136350 0.965531 -0.095356 0.193887 0.015656 0.000000 0.274725 0.000000 14.0 14.0 2.0 1815.929 129.709214 3.0 14.0 1815.929 14.0 129.709214 1.050192
4 337986 -0.203456 0.447374 -0.074082 0.194142 0.000249 0.000373 0.198381 0.096154 14.0 14.0 2.0 1870.996 133.642571 3.0 14.0 1870.996 14.0 133.642571 1.048630
In [14]:
print(leicester_emb_stats_for_corr.drop(columns=["osmnx_node_id"]).corr(method="kendall"))
                           EMB000    EMB001  EMB000pooled  EMB001pooled   
EMB000                   1.000000 -0.050229      0.414767     -0.117148  \
EMB001                  -0.050229  1.000000     -0.104923      0.393005   
EMB000pooled             0.414767 -0.104923      1.000000     -0.170566   
EMB001pooled            -0.117148  0.393005     -0.170566      1.000000   
closeness_networkwide    0.262182 -0.194019      0.364707     -0.337499   
betweenness_networkwide  0.242117 -0.025702      0.117417     -0.155388   
closeness_egograph       0.295632  0.135150      0.335473      0.202459   
betweenness_egograph     0.260210  0.097279      0.125096     -0.017320   
n                       -0.033401 -0.103946     -0.138311     -0.226253   
m                        0.013223 -0.100975     -0.068330     -0.212787   
k_avg                    0.260863  0.005101      0.376507      0.036726   
edge_length_total        0.210405 -0.130952      0.208221     -0.245809   
edge_length_avg          0.370231 -0.044676      0.579544     -0.021938   
streets_per_node_avg     0.280222 -0.231995      0.430593     -0.420593   
intersection_count       0.047278 -0.143725     -0.019241     -0.302492   
street_length_total      0.191975 -0.163191      0.190095     -0.315328   
street_segment_count     0.008809 -0.133773     -0.069979     -0.284915   
street_length_avg        0.365477 -0.044273      0.588756     -0.014573   
circuity_avg            -0.028421  0.131059     -0.065721      0.225035   

                         closeness_networkwide  betweenness_networkwide   
EMB000                                0.262182                 0.242117  \
EMB001                               -0.194019                -0.025702   
EMB000pooled                          0.364707                 0.117417   
EMB001pooled                         -0.337499                -0.155388   
closeness_networkwide                 1.000000                 0.245741   
betweenness_networkwide               0.245741                 1.000000   
closeness_egograph                    0.060927                 0.037740   
betweenness_egograph                  0.146073                 0.666984   
n                                     0.047026                 0.268784   
m                                     0.058577                 0.266006   
k_avg                                 0.090216                 0.047317   
edge_length_total                     0.243250                 0.369374   
edge_length_avg                       0.308200                 0.140604   
streets_per_node_avg                  0.448797                 0.226158   
intersection_count                    0.157044                 0.319659   
street_length_total                   0.284100                 0.390023   
street_segment_count                  0.107291                 0.295211   
street_length_avg                     0.301521                 0.126597   
circuity_avg                         -0.116848                -0.028826   

                         closeness_egograph  betweenness_egograph         n   
EMB000                             0.295632              0.260210 -0.033401  \
EMB001                             0.135150              0.097279 -0.103946   
EMB000pooled                       0.335473              0.125096 -0.138311   
EMB001pooled                       0.202459             -0.017320 -0.226253   
closeness_networkwide              0.060927              0.146073  0.047026   
betweenness_networkwide            0.037740              0.666984  0.268784   
closeness_egograph                 1.000000              0.205350 -0.452824   
betweenness_egograph               0.205350              1.000000  0.145905   
n                                 -0.452824              0.145905  1.000000   
m                                 -0.367824              0.157868  0.891733   
k_avg                              0.323223              0.100583  0.013712   
edge_length_total                 -0.146388              0.270640  0.589867   
edge_length_avg                    0.443310              0.178665 -0.296360   
streets_per_node_avg              -0.037485              0.092405  0.097850   
intersection_count                -0.420096              0.171545  0.826980   
street_length_total               -0.219952              0.256335  0.617244   
street_segment_count              -0.440343              0.155661  0.902224   
street_length_avg                  0.460244              0.169671 -0.292572   
circuity_avg                       0.082450              0.038269 -0.118638   

                                m     k_avg  edge_length_total   
EMB000                   0.013223  0.260863           0.210405  \
EMB001                  -0.100975  0.005101          -0.130952   
EMB000pooled            -0.068330  0.376507           0.208221   
EMB001pooled            -0.212787  0.036726          -0.245809   
closeness_networkwide    0.058577  0.090216           0.243250   
betweenness_networkwide  0.266006  0.047317           0.369374   
closeness_egograph      -0.367824  0.323223          -0.146388   
betweenness_egograph     0.157868  0.100583           0.270640   
n                        0.891733  0.013712           0.589867   
m                        1.000000  0.132379           0.665304   
k_avg                    0.132379  1.000000           0.305397   
edge_length_total        0.665304  0.305397           1.000000   
edge_length_avg         -0.234006  0.292995           0.104432   
streets_per_node_avg     0.117131  0.151434           0.321869   
intersection_count       0.810128  0.057609           0.674507   
street_length_total      0.656864  0.211502           0.879629   
street_segment_count     0.878092  0.040801           0.635415   
street_length_avg       -0.224905  0.323991           0.108293   
circuity_avg            -0.118920 -0.000375          -0.089911   

                         edge_length_avg  streets_per_node_avg   
EMB000                          0.370231              0.280222  \
EMB001                         -0.044676             -0.231995   
EMB000pooled                    0.579544              0.430593   
EMB001pooled                   -0.021938             -0.420593   
closeness_networkwide           0.308200              0.448797   
betweenness_networkwide         0.140604              0.226158   
closeness_egograph              0.443310             -0.037485   
betweenness_egograph            0.178665              0.092405   
n                              -0.296360              0.097850   
m                              -0.234006              0.117131   
k_avg                           0.292995              0.151434   
edge_length_total               0.104432              0.321869   
edge_length_avg                 1.000000              0.340156   
streets_per_node_avg            0.340156              1.000000   
intersection_count             -0.166574              0.279544   
street_length_total             0.080524              0.389084   
street_segment_count           -0.232123              0.197164   
street_length_avg               0.904994              0.316873   
circuity_avg                    0.048389             -0.153925   

                         intersection_count  street_length_total   
EMB000                             0.047278             0.191975  \
EMB001                            -0.143725            -0.163191   
EMB000pooled                      -0.019241             0.190095   
EMB001pooled                      -0.302492            -0.315328   
closeness_networkwide              0.157044             0.284100   
betweenness_networkwide            0.319659             0.390023   
closeness_egograph                -0.420096            -0.219952   
betweenness_egograph               0.171545             0.256335   
n                                  0.826980             0.617244   
m                                  0.810128             0.656864   
k_avg                              0.057609             0.211502   
edge_length_total                  0.674507             0.879629   
edge_length_avg                   -0.166574             0.080524   
streets_per_node_avg               0.279544             0.389084   
intersection_count                 1.000000             0.739393   
street_length_total                0.739393             1.000000   
street_segment_count               0.910502             0.687454   
street_length_avg                 -0.170992             0.084041   
circuity_avg                      -0.143807            -0.106058   

                         street_segment_count  street_length_avg  circuity_avg  
EMB000                               0.008809           0.365477     -0.028421  
EMB001                              -0.133773          -0.044273      0.131059  
EMB000pooled                        -0.069979           0.588756     -0.065721  
EMB001pooled                        -0.284915          -0.014573      0.225035  
closeness_networkwide                0.107291           0.301521     -0.116848  
betweenness_networkwide              0.295211           0.126597     -0.028826  
closeness_egograph                  -0.440343           0.460244      0.082450  
betweenness_egograph                 0.155661           0.169671      0.038269  
n                                    0.902224          -0.292572     -0.118638  
m                                    0.878092          -0.224905     -0.118920  
k_avg                                0.040801           0.323991     -0.000375  
edge_length_total                    0.635415           0.108293     -0.089911  
edge_length_avg                     -0.232123           0.904994      0.048389  
streets_per_node_avg                 0.197164           0.316873     -0.153925  
intersection_count                   0.910502          -0.170992     -0.143807  
street_length_total                  0.687454           0.084041     -0.106058  
street_segment_count                 1.000000          -0.233779     -0.132164  
street_length_avg                   -0.233779           1.000000      0.043635  
circuity_avg                        -0.132164           0.043635      1.000000  
In [15]:
# Double-checking difference with Spearman's rho
print(leicester_emb_stats_for_corr.drop(columns=["osmnx_node_id"]).corr(method="spearman"))
                           EMB000    EMB001  EMB000pooled  EMB001pooled   
EMB000                   1.000000 -0.077134      0.598860     -0.184985  \
EMB001                  -0.077134  1.000000     -0.161475      0.569221   
EMB000pooled             0.598860 -0.161475      1.000000     -0.270270   
EMB001pooled            -0.184985  0.569221     -0.270270      1.000000   
closeness_networkwide    0.388640 -0.292064      0.537505     -0.505571   
betweenness_networkwide  0.354346 -0.053826      0.170578     -0.225798   
closeness_egograph       0.437037  0.202000      0.490640      0.291693   
betweenness_egograph     0.380707  0.151481      0.181311     -0.027183   
n                       -0.049975 -0.157148     -0.204113     -0.329446   
m                        0.021164 -0.152845     -0.101058     -0.310380   
k_avg                    0.388864  0.003666      0.545514      0.045068   
edge_length_total        0.320914 -0.202643      0.310028     -0.358725   
edge_length_avg          0.534784 -0.066179      0.775982     -0.038231   
streets_per_node_avg     0.414339 -0.348581      0.610777     -0.605623   
intersection_count       0.072714 -0.218416     -0.026976     -0.434794   
street_length_total      0.294263 -0.253735      0.286724     -0.456843   
street_segment_count     0.014611 -0.203271     -0.103784     -0.410763   
street_length_avg        0.528989 -0.065379      0.785537     -0.026850   
circuity_avg            -0.043169  0.195740     -0.100317      0.327021   

                         closeness_networkwide  betweenness_networkwide   
EMB000                                0.388640                 0.354346  \
EMB001                               -0.292064                -0.053826   
EMB000pooled                          0.537505                 0.170578   
EMB001pooled                         -0.505571                -0.225798   
closeness_networkwide                 1.000000                 0.350715   
betweenness_networkwide               0.350715                 1.000000   
closeness_egograph                    0.087531                 0.051907   
betweenness_egograph                  0.212391                 0.835012   
n                                     0.069174                 0.382218   
m                                     0.087432                 0.380044   
k_avg                                 0.129970                 0.066617   
edge_length_total                     0.360226                 0.529066   
edge_length_avg                       0.456250                 0.204010   
streets_per_node_avg                  0.641996                 0.326195   
intersection_count                    0.233107                 0.453933   
street_length_total                   0.418098                 0.556922   
street_segment_count                  0.159911                 0.419911   
street_length_avg                     0.448151                 0.184167   
circuity_avg                         -0.176367                -0.041270   

                         closeness_egograph  betweenness_egograph         n   
EMB000                             0.437037              0.380707 -0.049975  \
EMB001                             0.202000              0.151481 -0.157148   
EMB000pooled                       0.490640              0.181311 -0.204113   
EMB001pooled                       0.291693             -0.027183 -0.329446   
closeness_networkwide              0.087531              0.212391  0.069174   
betweenness_networkwide            0.051907              0.835012  0.382218   
closeness_egograph                 1.000000              0.297919 -0.629959   
betweenness_egograph               0.297919              1.000000  0.213167   
n                                 -0.629959              0.213167  1.000000   
m                                 -0.523668              0.230822  0.980338   
k_avg                              0.458938              0.152805  0.015981   
edge_length_total                 -0.216647              0.397477  0.784142   
edge_length_avg                    0.620514              0.255275 -0.425638   
streets_per_node_avg              -0.060167              0.143119  0.144189   
intersection_count                -0.590339              0.251619  0.953861   
street_length_total               -0.322573              0.381201  0.806319   
street_segment_count              -0.616406              0.228824  0.983940   
street_length_avg                  0.640899              0.242273 -0.420878   
circuity_avg                       0.123876              0.054817 -0.173852   

                                m     k_avg  edge_length_total   
EMB000                   0.021164  0.388864           0.320914  \
EMB001                  -0.152845  0.003666          -0.202643   
EMB000pooled            -0.101058  0.545514           0.310028   
EMB001pooled            -0.310380  0.045068          -0.358725   
closeness_networkwide    0.087432  0.129970           0.360226   
betweenness_networkwide  0.380044  0.066617           0.529066   
closeness_egograph      -0.523668  0.458938          -0.216647   
betweenness_egograph     0.230822  0.152805           0.397477   
n                        0.980338  0.015981           0.784142   
m                        1.000000  0.188244           0.851935   
k_avg                    0.188244  1.000000           0.432809   
edge_length_total        0.851935  0.432809           1.000000   
edge_length_avg         -0.341298  0.432676           0.160137   
streets_per_node_avg     0.172351  0.206537           0.464561   
intersection_count       0.944876  0.071353           0.857755   
street_length_total      0.842110  0.292745           0.972294   
street_segment_count     0.971088  0.050415           0.827154   
street_length_avg       -0.328617  0.474856           0.165242   
circuity_avg            -0.173620 -0.001432          -0.132332   

                         edge_length_avg  streets_per_node_avg   
EMB000                          0.534784              0.414339  \
EMB001                         -0.066179             -0.348581   
EMB000pooled                    0.775982              0.610777   
EMB001pooled                   -0.038231             -0.605623   
closeness_networkwide           0.456250              0.641996   
betweenness_networkwide         0.204010              0.326195   
closeness_egograph              0.620514             -0.060167   
betweenness_egograph            0.255275              0.143119   
n                              -0.425638              0.144189   
m                              -0.341298              0.172351   
k_avg                           0.432676              0.206537   
edge_length_total               0.160137              0.464561   
edge_length_avg                 1.000000              0.499462   
streets_per_node_avg            0.499462              1.000000   
intersection_count             -0.240841              0.406843   
street_length_total             0.128629              0.558292   
street_segment_count           -0.338067              0.291215   
street_length_avg               0.983105              0.469362   
circuity_avg                    0.067899             -0.230816   

                         intersection_count  street_length_total   
EMB000                             0.072714             0.294263  \
EMB001                            -0.218416            -0.253735   
EMB000pooled                      -0.026976             0.286724   
EMB001pooled                      -0.434794            -0.456843   
closeness_networkwide              0.233107             0.418098   
betweenness_networkwide            0.453933             0.556922   
closeness_egograph                -0.590339            -0.322573   
betweenness_egograph               0.251619             0.381201   
n                                  0.953861             0.806319   
m                                  0.944876             0.842110   
k_avg                              0.071353             0.292745   
edge_length_total                  0.857755             0.972294   
edge_length_avg                   -0.240841             0.128629   
streets_per_node_avg               0.406843             0.558292   
intersection_count                 1.000000             0.904625   
street_length_total                0.904625             1.000000   
street_segment_count               0.985890             0.867512   
street_length_avg                 -0.248148             0.133033   
circuity_avg                      -0.209816            -0.156784   

                         street_segment_count  street_length_avg  circuity_avg  
EMB000                               0.014611           0.528989     -0.043169  
EMB001                              -0.203271          -0.065379      0.195740  
EMB000pooled                        -0.103784           0.785537     -0.100317  
EMB001pooled                        -0.410763          -0.026850      0.327021  
closeness_networkwide                0.159911           0.448151     -0.176367  
betweenness_networkwide              0.419911           0.184167     -0.041270  
closeness_egograph                  -0.616406           0.640899      0.123876  
betweenness_egograph                 0.228824           0.242273      0.054817  
n                                    0.983940          -0.420878     -0.173852  
m                                    0.971088          -0.328617     -0.173620  
k_avg                                0.050415           0.474856     -0.001432  
edge_length_total                    0.827154           0.165242     -0.132332  
edge_length_avg                     -0.338067           0.983105      0.067899  
streets_per_node_avg                 0.291215           0.469362     -0.230816  
intersection_count                   0.985890          -0.248148     -0.209816  
street_length_total                  0.867512           0.133033     -0.156784  
street_segment_count                 1.000000          -0.340965     -0.192780  
street_length_avg                   -0.340965           1.000000      0.060435  
circuity_avg                        -0.192780           0.060435      1.000000  
In [16]:
sns.pairplot(leicester_emb_stats_for_corr.drop(columns=["osmnx_node_id"]), kind="hist")
Out[16]:
<seaborn.axisgrid.PairGrid at 0x1a3613d90>
In [17]:
leicester_emb_stats_for_corr.to_csv(this_repo_directory + "/data/leicester-1864_emb_gnnuf_model_v0-5_incl-pool-with-stats.csv", index=False)

Check specific nodes¶

Check specific nodes based on their embeddings and ego-graph structure

In [18]:
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 6782625866]
Out[18]:
osmnx_node_id EMB000 EMB001
12711 6782625866 -0.999753 -0.943405
In [19]:
ego_6782625866 = nx.generators.ego_graph(
    leicester_osmnx_graph, 6782625866,
    radius=max_distance, undirected=True, distance="length")
ego_6782625866_prj = ox.project_graph(ego_6782625866)
ox.plot_graph(
    ego_6782625866_prj,
    node_size=[20 if node == 6782625866 else 5 for node in ego_6782625866_prj.nodes],
    node_color=["#e41a1c" if node == 6782625866 else "#ffffff" for node in ego_6782625866_prj.nodes],
    figsize=(5, 5)
)
Out[19]:
(<Figure size 500x500 with 1 Axes>, <Axes: >)
In [20]:
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 354554417]
Out[20]:
osmnx_node_id EMB000 EMB001
4733 354554417 -0.966505 0.982919
In [21]:
ego_354554417 = nx.generators.ego_graph(
    leicester_osmnx_graph, 354554417,
    radius=max_distance, undirected=True, distance="length")
ego_354554417_prj = ox.project_graph(ego_354554417)
ox.plot_graph(
    ego_354554417_prj,
    node_size=[20 if node == 354554417 else 5 for node in ego_354554417_prj.nodes],
    node_color=["#e41a1c" if node == 354554417 else "#ffffff" for node in ego_354554417_prj.nodes],
    figsize=(5, 5)
)
Out[21]:
(<Figure size 500x500 with 1 Axes>, <Axes: >)
In [22]:
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 1179199412]
Out[22]:
osmnx_node_id EMB000 EMB001
8021 1179199412 0.999113 0.99909
In [23]:
ego_1179199412 = nx.generators.ego_graph(
    leicester_osmnx_graph, 1179199412,
    radius=max_distance, undirected=True, distance="length")
ego_1179199412_prj = ox.project_graph(ego_1179199412)
ox.plot_graph(
    ego_1179199412_prj,
    node_size=[20 if node == 1179199412 else 5 for node in ego_1179199412_prj.nodes],
    node_color=["#e41a1c" if node == 1179199412 else "#ffffff" for node in ego_1179199412_prj.nodes],
    figsize=(5, 5)
)
Out[23]:
(<Figure size 500x500 with 1 Axes>, <Axes: >)
In [24]:
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 2858142815]
Out[24]:
osmnx_node_id EMB000 EMB001
11121 2858142815 0.002341 0.165836
In [25]:
ego_2858142815 = nx.generators.ego_graph(
    leicester_osmnx_graph, 2858142815,
    radius=max_distance, undirected=True, distance="length")
ego_2858142815_prj = ox.project_graph(ego_2858142815)
ox.plot_graph(
    ego_2858142815_prj,
    node_size=[20 if node == 2858142815 else 5 for node in ego_2858142815_prj.nodes],
    node_color=["#e41a1c" if node == 2858142815 else "#ffffff" for node in ego_2858142815_prj.nodes],
    figsize=(5, 5)
)
Out[25]:
(<Figure size 500x500 with 1 Axes>, <Axes: >)
In [26]:
leicester_emb_df[leicester_emb_df["osmnx_node_id"] == 296162322]
Out[26]:
osmnx_node_id EMB000 EMB001
3845 296162322 0.99999 -0.999946
In [27]:
ego_296162322 = nx.generators.ego_graph(
    leicester_osmnx_graph, 296162322,
    radius=max_distance, undirected=True, distance="length")
ego_296162322_prj = ox.project_graph(ego_296162322)
ox.plot_graph(
    ego_296162322_prj,
    node_size=[20 if node == 296162322 else 5 for node in ego_296162322_prj.nodes],
    node_color=["#e41a1c" if node == 296162322 else "#ffffff" for node in ego_296162322_prj.nodes],
    figsize=(5, 5)
)
Out[27]:
(<Figure size 500x500 with 1 Axes>, <Axes: >)

Explore patterns¶

In [28]:
leicester_emb_patters_df = leicester_emb_df.copy()

Embeddings clusters plot¶

In [29]:
from sklearn.cluster import DBSCAN
leicester_emb_df_clust = leicester_emb_df[["EMB000", "EMB001"]].dropna()
clust = DBSCAN(eps=0.11, min_samples=300)
leicester_emb_patters_df["clust"] = clust.fit_predict(leicester_emb_df_clust)
leicester_emb_patters_df["clust"].nunique()
Out[29]:
8
In [30]:
colorbrewer_set1 = ["#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33", "#a65628", "#f781bf", "#999999"]
leicester_emb_patters_df["clust_colour"] = leicester_emb_patters_df["clust"].apply(lambda x: colorbrewer_set1[x])
leicester_emb_patters_df.head()
Out[30]:
osmnx_node_id EMB000 EMB001 clust clust_colour
0 337976 -0.212304 -0.563564 -1 #999999
1 337979 -0.322662 -0.882213 -1 #999999
2 337983 -0.009132 0.948856 -1 #999999
3 337985 -0.136350 0.965531 -1 #999999
4 337986 -0.203456 0.447374 -1 #999999
In [31]:
plt.figure(figsize=(7,7))
ax = plt.axes()
ax.set_facecolor("white")
plt.scatter(
    x=leicester_emb_patters_df.EMB000,
    y=leicester_emb_patters_df.EMB001,
    c=leicester_emb_patters_df.clust_colour,
    s=5, edgecolors='black', linewidth=0.1)
plt.xlabel("Embeddings first dimension")
plt.ylabel("Embeddings second dimension")
plt.show()
In [32]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=leicester_emb_patters_df.EMB000,
    y=leicester_emb_patters_df.EMB001,
    mode='markers',
    marker=dict(color=leicester_emb_patters_df.clust_colour)
))
fig.update_layout({"plot_bgcolor": "#ffffff"}, width=800, height=800)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.show()

Bivariate embedding plots¶

In [33]:
def bivariate_colour(x, limits):
    if x[0] is None or x[1] is None:
        return None
    else:
        if x[0] <= limits[0, 0]:
            if x[1] <= limits[1, 0]:
                # return "#e8e8e8"
                return "#e8e8e8"
            elif x[1] <= limits[1, 1]:
                # return "#cbb8d7"
                return "#e4acac"
            else:
                # return "#9972af"
                return "#c85a5a"
        if x[0] <= limits[0, 1]:
            if x[1] <= limits[1, 0]:
                # return "#e4d9ac"
                return "#b0d5df"
            elif x[1] <= limits[1, 1]:
                # return "#c8ada0"
                return "#ad9ea5"
            else:
                # return "#976b82"
                return "#985356"
        else:
            if x[1] <= limits[1, 0]:
                # return "#c8b35a"
                return "#64acbe"
            elif x[1] <= limits[1, 1]:
                # return "#af8e53"
                return "#627f8c"
            else:
                # return "#804d36"
                return "#574249"

leicester_emb_quantiles = leicester_emb_df[["EMB000", "EMB001"]].quantile([1/3, 2/3]).values.transpose()
leicester_emb_patters_df["bivariate_colour"] = leicester_emb_patters_df.apply(
    lambda x: bivariate_colour([x["EMB000"], x["EMB001"]], leicester_emb_quantiles), axis=1
)
leicester_emb_patters_df.head()
Out[33]:
osmnx_node_id EMB000 EMB001 clust clust_colour bivariate_colour
0 337976 -0.212304 -0.563564 -1 #999999 #e8e8e8
1 337979 -0.322662 -0.882213 -1 #999999 #e8e8e8
2 337983 -0.009132 0.948856 -1 #999999 #985356
3 337985 -0.136350 0.965531 -1 #999999 #c85a5a
4 337986 -0.203456 0.447374 -1 #999999 #e4acac
In [34]:
plt.figure(figsize=(7,7))
ax = plt.axes()
ax.set_facecolor("white")
plt.scatter(
    x=leicester_emb_patters_df.EMB000,
    y=leicester_emb_patters_df.EMB001,
    c=leicester_emb_patters_df.bivariate_colour,
    s=10, edgecolors='black', linewidth=0.1)
plt.xlabel("Embeddings first dimension")
plt.ylabel("Embeddings second dimension")
plt.show()
In [35]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=leicester_emb_patters_df.EMB000,
    y=leicester_emb_patters_df.EMB001,
    mode='markers',
    marker=dict(color=leicester_emb_patters_df.bivariate_colour)
))
fig.update_layout({"plot_bgcolor": "#ffffff"}, width=800, height=800)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#cccccc', zeroline=True, zerolinewidth=1, zerolinecolor='#cccccc')
fig.show()

Maps¶

In [36]:
leicester_osmnx_bivariate = leicester_osmnx_graph_prj.copy()

for node in leicester_osmnx_bivariate.nodes:
    node_bivariate_colour = leicester_emb_patters_df.loc[leicester_emb_patters_df["osmnx_node_id"] == node]
    if node_bivariate_colour.empty:
        leicester_osmnx_bivariate.nodes[node]["bivariate_colour"] = "#000000"
        leicester_osmnx_bivariate.nodes[node]["clust_colour"] = "#000000"
        leicester_osmnx_bivariate.nodes[node]["node_size"] = 1
    else:
        leicester_osmnx_bivariate.nodes[node]["bivariate_colour"] = node_bivariate_colour["bivariate_colour"].values[0]
        leicester_osmnx_bivariate.nodes[node]["clust_colour"] = node_bivariate_colour["clust_colour"].values[0]
        leicester_osmnx_bivariate.nodes[node]["node_size"] = 7
In [37]:
ox.plot_graph(
    leicester_osmnx_bivariate,
    node_color=[leicester_osmnx_bivariate.nodes[node]["clust_colour"] for node in leicester_osmnx_bivariate.nodes],
    node_size=[leicester_osmnx_bivariate.nodes[node]["node_size"]if leicester_osmnx_bivariate.nodes[node]["clust_colour"]!=colorbrewer_set1[-1] else 1 for node in leicester_osmnx_bivariate.nodes],
    bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
    figsize=(12, 12))
Out[37]:
(<Figure size 1200x1200 with 1 Axes>, <Axes: >)
In [38]:
# ox.plot_graph(
#     leicester_osmnx_bivariate,
#     node_color=[leicester_osmnx_bivariate.nodes[node]["clust_colour"] for node in leicester_osmnx_bivariate.nodes],
#     node_size=[leicester_osmnx_bivariate.nodes[node]["node_size"]*8 if leicester_osmnx_bivariate.nodes[node]["clust_colour"]!=colorbrewer_set1[-1] else 8 for node in leicester_osmnx_bivariate.nodes],
#     bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
#     figsize=(48, 48))
In [39]:
ox.plot_graph(
    leicester_osmnx_bivariate,
    node_color=[leicester_osmnx_bivariate.nodes[node]["bivariate_colour"] for node in leicester_osmnx_bivariate.nodes],
    node_size=[leicester_osmnx_bivariate.nodes[node]["node_size"] for node in leicester_osmnx_bivariate.nodes],
    bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
    figsize=(12, 12))
Out[39]:
(<Figure size 1200x1200 with 1 Axes>, <Axes: >)
In [40]:
# ox.plot_graph(
#     leicester_osmnx_bivariate,
#     node_color=[leicester_osmnx_bivariate.nodes[node]["bivariate_colour"] for node in leicester_osmnx_bivariate.nodes],
#     node_size=[leicester_osmnx_bivariate.nodes[node]["node_size"]*2 for node in leicester_osmnx_bivariate.nodes],
#     bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
#     figsize=(24, 24))
In [41]:
import geopandas as gpd
leicester_gdf = gpd.GeoDataFrame(
    leicester_osmnx_graph_prj_df,
    geometry=gpd.points_from_xy(
        leicester_osmnx_graph_prj_df.lon,
        leicester_osmnx_graph_prj_df.lat
    ),
    crs="EPSG:4326"
).merge(leicester_emb_patters_df, on='osmnx_node_id', how='left')
leicester_gdf.head()
Out[41]:
y x street_count elevation elevation_aster elevation_srtm lon lat osmnx_node_id ref highway geometry EMB000 EMB001 clust clust_colour bivariate_colour
0 5.829804e+06 622151.977595 3 72.0 35 72 -1.196195 52.604506 194739 NaN NaN POINT (-1.19620 52.60451) NaN NaN NaN NaN NaN
1 5.829991e+06 622098.041002 3 72.0 45 72 -1.196922 52.606196 1551014281 NaN NaN POINT (-1.19692 52.60620) NaN NaN NaN NaN NaN
2 5.828827e+06 622259.813792 2 79.0 57 79 -1.194965 52.595696 326312 21 motorway_junction POINT (-1.19497 52.59570) NaN NaN NaN NaN NaN
3 5.830107e+06 622077.742140 3 79.0 43 79 -1.197179 52.607245 326320 21 motorway_junction POINT (-1.19718 52.60724) NaN NaN NaN NaN NaN
4 5.829673e+06 622220.645785 3 74.0 35 74 -1.195230 52.603314 2627867454 NaN NaN POINT (-1.19523 52.60331) 0.435371 -0.637928 -1.0 #999999 #b0d5df
In [42]:
leicester_gdf[leicester_gdf["clust_colour"]!=colorbrewer_set1[-1]].dropna(subset=["EMB000"]).explore(
    color="clust_colour",
    marker_kwds={"radius": 7}, style_kwds={"stroke": False},
    tiles="Stamen Toner"
)
Out[42]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [43]:
leicester_gdf[leicester_gdf["bivariate_colour"]!="#000000"].dropna(subset=["EMB000"]).explore(
    color="bivariate_colour",
    marker_kwds={"radius": 7}, style_kwds={"stroke": False},
    legend=True,
    tiles="Stamen Toner"
)
Out[43]:
Make this Notebook Trusted to load map: File -> Trust Notebook

Comparison: centrality plots and maps¶

In [44]:
leicester_osmnx_centrality = leicester_osmnx_graph_prj.copy()
In [45]:
leicester_centralities_networkwide_quantiles = leicester_emb_stats_for_corr[["closeness_networkwide", "betweenness_networkwide"]].quantile([1/3, 2/3]).values.transpose()
leicester_emb_stats_for_corr["bivariate_centrality_networkwide"] = leicester_emb_stats_for_corr.apply(
    lambda x: bivariate_colour([x["closeness_networkwide"], x["betweenness_networkwide"]], leicester_centralities_networkwide_quantiles), axis=1
)
In [46]:
leicester_centralities_egograph_quantiles = leicester_emb_stats_for_corr[["closeness_egograph", "betweenness_egograph"]].quantile([1/3, 2/3]).values.transpose()
leicester_emb_stats_for_corr["bivariate_centrality_egograph"] = leicester_emb_stats_for_corr.apply(
    lambda x: bivariate_colour([x["closeness_egograph"], x["betweenness_egograph"]], leicester_centralities_egograph_quantiles), axis=1
)
In [47]:
for node in leicester_osmnx_centrality.nodes:
    # networkwide
    leicester_osmnx_centrality.nodes[node]["closeness_networkwide"] = None
    leicester_osmnx_centrality.nodes[node]["betweenness_networkwide"] = None
    leicester_osmnx_centrality.nodes[node]["bivariate_centrality_networkwide"] = "#000000"
    # egograph
    leicester_osmnx_centrality.nodes[node]["closeness_egograph"] = None
    leicester_osmnx_centrality.nodes[node]["betweenness_egograph"] = None
    leicester_osmnx_centrality.nodes[node]["bivariate_centrality_egograph"] = "#000000"
    if node in leicester_emb_stats_for_corr["osmnx_node_id"].values:
        # networkwide
        leicester_osmnx_centrality.nodes[node]["closeness_networkwide"] = leicester_emb_stats_for_corr.loc[
            leicester_emb_stats_for_corr["osmnx_node_id"]==node, "closeness_networkwide"].values[0]
        leicester_osmnx_centrality.nodes[node]["betweenness_networkwide"] = leicester_emb_stats_for_corr.loc[
            leicester_emb_stats_for_corr["osmnx_node_id"]==node, "betweenness_networkwide"].values[0]
        leicester_osmnx_centrality.nodes[node]["bivariate_centrality_networkwide"] = leicester_emb_stats_for_corr.loc[
            leicester_emb_stats_for_corr["osmnx_node_id"]==node, "bivariate_centrality_networkwide"].values[0]
        # egograph
        leicester_osmnx_centrality.nodes[node]["closeness_egograph"] = leicester_emb_stats_for_corr.loc[
            leicester_emb_stats_for_corr["osmnx_node_id"]==node, "closeness_egograph"].values[0]
        leicester_osmnx_centrality.nodes[node]["betweenness_egograph"] = leicester_emb_stats_for_corr.loc[
            leicester_emb_stats_for_corr["osmnx_node_id"]==node, "betweenness_egograph"].values[0]
        leicester_osmnx_centrality.nodes[node]["bivariate_centrality_egograph"] = leicester_emb_stats_for_corr.loc[
            leicester_emb_stats_for_corr["osmnx_node_id"]==node, "bivariate_centrality_egograph"].values[0]

City-wide centrality¶

In [48]:
plt.figure(figsize=(7,7))
ax = plt.axes()
ax.set_facecolor("white")
ax.set_yscale('log')
plt.scatter(
    x=leicester_emb_stats_for_corr.closeness_networkwide,
    y=leicester_emb_stats_for_corr.betweenness_networkwide,
    c=leicester_emb_stats_for_corr.bivariate_centrality_networkwide,
    s=10, edgecolors='black', linewidth=0.1)
plt.xlabel("closeness_networkwide")
plt.ylabel("betweenness_networkwide")
plt.show()
In [49]:
ox.plot_graph(
    leicester_osmnx_centrality,
    node_color=[leicester_osmnx_centrality.nodes[node]["bivariate_centrality_networkwide"] for node in leicester_osmnx_centrality.nodes],
    node_size=[1 if leicester_osmnx_centrality.nodes[node]["bivariate_centrality_networkwide"]=="#000000" else 7 for node in leicester_osmnx_centrality.nodes],
    bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
    figsize=(12, 12))
Out[49]:
(<Figure size 1200x1200 with 1 Axes>, <Axes: >)

Ego-graph centrality¶

In [50]:
plt.figure(figsize=(7,7))
ax = plt.axes()
ax.set_facecolor("white")
ax.set_yscale('log')
plt.scatter(
    x=leicester_emb_stats_for_corr.closeness_egograph,
    y=leicester_emb_stats_for_corr.betweenness_egograph,
    c=leicester_emb_stats_for_corr.bivariate_centrality_egograph,
    s=10, edgecolors='black', linewidth=0.1)
plt.xlabel("closeness_egograph")
plt.ylabel("betweenness_egograph")
plt.show()
In [51]:
ox.plot_graph(
    leicester_osmnx_centrality,
    node_color=[leicester_osmnx_centrality.nodes[node]["bivariate_centrality_egograph"] for node in leicester_osmnx_centrality.nodes],
    node_size=[1 if leicester_osmnx_centrality.nodes[node]["bivariate_centrality_egograph"]=="#000000" else 7 for node in leicester_osmnx_centrality.nodes],
    bgcolor="#ffffff", edge_color="#000000", edge_linewidth=0.1,
    figsize=(12, 12))
Out[51]:
(<Figure size 1200x1200 with 1 Axes>, <Axes: >)
In [51]: